********************************************************************************
*
*
* Process teacher educator data
*
* author: MC
*
* last updated: 2020-2-6
*
*		2020-2-13; for NC, if a district is in multiple counties, now assigning to both
*		2020-4-27: for all states, assign to most recent county
*
********************************************************************************

cap log close
log using "$logs/cr-synth-county-tch.log", replace

// Focus on race, because that's the main breakdown we have across states.

********************************************************************************
* California
********************************************************************************

	use "$raw/teacher_demo/paif04-18_district_nces.dta", clear
	keep if inrange(schendyr,2009,2016)
	assert _merge==3
	rename schendyr schoolyear
	isid leaid schoolyear
	keep leaid dp_white_tch dp_black_tch schoolyear dp_teach_fte

	assert dp_white_tch+ dp_black_tch<=1 if !mi(dp_white_tch, dp_black_tch)
	rename dp_black pctblk
	rename dp_white pctwht

	replace pctblk = pctblk*dp_teach
	replace pctwht = pctwht*dp_teach
	gen pctoth = dp_teach-(pctblk+pctwht)
	rename dp_teach tottch
	collapse (sum) tottch pct*, by(leaid)
	renvars pct*, presub(pct tot)

	gen fips=6

	// Assign most recent county, as is done in SEDA (2015-16)
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="CALIFORNIA"
		keep leaid coname???? fipsco????
		reshape long coname fipsco, i(leaid) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		egen tag = tag(leaid)
		tab nvals if tag // <1%
		gsort leaid -year
		collapse (firstnm) fipsco, by(leaid)
		tempfile tempca
		save `tempca'
	restore
	merge 1:1 leaid using `tempca', assert(2 3) keep(3) nogen

		isid leaid
		collapse (sum) tot???, by(fipsco fips)
		
		foreach var in wht blk oth {
			replace tot`var' = tot`var'/tottch
			rename tot`var' pct`var'
			}
	 
	save "$data/ca_tch_demo.dta", replace

********************************************************************************
* Florida
********************************************************************************

	// 13-14
	import excel "$raw/teacher_demo/florida/0069354-staff1314-instructional.xls", ///
		sheet("Total Teacher by Race Gender") cellrange(A5:S81) firstrow case(lower) clear
		
		renvars d-j, prefix(m_)
		renvars l-r, prefix(f_)
		rename male m_wht
		rename female f_wht
		rename m_d m_blk
		rename m_e m_hisp
		rename m_f m_asian
		rename m_g m_native
		rename m_h m_pi
		rename m_i m_multi
		rename m_j m_tot
		rename f_l f_blk
		rename f_m f_hisp
		rename f_n f_asian
		rename f_o f_native
		rename f_p f_pi
		rename f_q f_multi
		rename f_r f_tot
		drop if mi(district)
		destring *, replace
		keep districtnumber district ?_wht ?_blk grandtotal
		gen totblk = f_blk+m_blk
		gen totwht = f_wht+m_wht
		gen tototh = grandtotal-(totblk+totwht)
		rename grandtotal tottch
		keep districtnum district tot*
		gen yr = 2014
		tempfile fl14
		save `fl14'
		
	// 14-15
	import excel "$raw/teacher_demo/florida/GABX-14152-121014-Instructional-Staff-Web.xls", ///
		sheet("Total Teacher by Race Gender") cellrange(A5:S81) firstrow case(lower) clear
		
		renvars d-j, prefix(m_)
		renvars l-r, prefix(f_)
		rename male m_wht
		rename female f_wht
		rename m_d m_blk
		rename m_e m_hisp
		rename m_f m_asian
		rename m_g m_native
		rename m_h m_pi
		rename m_i m_multi
		rename m_j m_tot
		rename f_l f_blk
		rename f_m f_hisp
		rename f_n f_asian
		rename f_o f_native
		rename f_p f_pi
		rename f_q f_multi
		rename f_r f_tot
		drop if mi(district)
		destring *, replace
		keep districtnumber district ?_wht ?_blk grandtotal
		gen totblk = f_blk+m_blk
		gen totwht = f_wht+m_wht
		gen tototh = grandtotal-(totblk+totwht)
		rename grandtotal tottch
		keep districtnum district tot*
		gen yr = 2015
		tempfile fl15
		save `fl15'
		
	// 15-16
	import excel "$raw/teacher_demo/florida/Instructional15162.xls", ///
		sheet("Total Teachers by Race Gender") cellrange(A5:S81) firstrow case(lower) clear
		
		renvars d-j, prefix(m_)
		renvars l-r, prefix(f_)
		rename male m_wht
		rename female f_wht
		rename m_d m_blk
		rename m_e m_hisp
		rename m_f m_asian
		rename m_g m_native
		rename m_h m_pi
		rename m_i m_multi
		rename m_j m_tot
		rename f_l f_blk
		rename f_m f_hisp
		rename f_n f_asian
		rename f_o f_native
		rename f_p f_pi
		rename f_q f_multi
		rename f_r f_tot
		drop if mi(district)
		destring *, replace
		rename disrictnumber districtnumber
		keep districtnumber district ?_wht ?_blk grandtotal
		gen totblk = f_blk+m_blk
		gen totwht = f_wht+m_wht
		gen tototh = grandtotal-(totblk+totwht)
		rename grandtotal tottch
		keep districtnum district tot*
		gen yr = 2016

		append using `fl14'
		append using `fl15'
		
		reshape wide tottch totblk totwht tototh, i(districtnumber district) j(yr)
		egen rm = rowmiss(*)
		tab rm // 90% have all data. 
		
		foreach var in tch blk wht oth {
			egen tot`var' = rowtotal(tot`var'*)
			}
		gen flag_miss_data = rm !=0
		drop rm
		drop tot???20??
		
		gen fips = 12
		rename district leaname
		
		isid leaname
	
	// Assign most recent county, as is done in SEDA (2015-16)
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="FLORIDA"
		keep leaid coname???? fipsco???? leaname
		reshape long coname fipsco, i(leaid leaname) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		assert nvals==1
		keep leaname leaid fipsco
		duplicates drop
		isid leaid
		tempfile tempfl
		save `tempfl'
	restore
	reclink leaname using ///
		`tempfl', idmaster(districtnumber) idusing(leaid) gen(match)

	assert match==1 | leaname=="FLORIDA"
	drop _merge match
			
	isid leaname
	
	collapse (sum) tot??? (max) flag_miss_data, by(fipsco fips)
	
	// How far off are those missing data?
	egen rt = rowtotal(totblk totwht tototh)
	gen diff = tottch-rt
	sum diff, detail // Off by very little.
	replace tottch = rt
	drop rt diff flag_miss_data
	
	foreach var in wht blk oth {
		replace tot`var' = tot`var'/tottch
		rename tot`var' pct`var'
		} 
				
	save "$data/fl_tch_demo.dta", replace
		
********************************************************************************
* New York
********************************************************************************
	
	import excel "$raw/teacher_demo/StaffRaceandEthnicityStatistics12-31-19.xlsx", ///
		sheet("TeacherRacePre2018") firstrow case(lower) clear
	keep if record_type_code != 1
	assert inlist(record_type_desc,"CHARTER SCHOOLS (IMF)","PUBLIC SCHOOL DISTRICT (IMF)")
		
	replace school_year = substr(school_year,1,4)
	destring school_year, replace
	keep if inrange(school_year,2008,2015)
	
	keep school_year county_code county_desc record_type_code record_type_desc total_type district_code district_name total white blackorafricanamerican
	rename white totwht
	rename black totblk
	rename total tottch
	gen tototh = tottch-(totblk+totwht)
	isid district_code school_year
	collapse (sum) tot???, by(county_code county_desc)
	gen fips=36
	
	rename county_desc coname
	destring county_code, replace
		
	// Get county names/codes from CCD
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="NEW YORK"
		keep coname???? fipsco????
		gen temp = _n
		reshape long coname fipsco, i(temp) j(year)
		replace coname = upper(coname)
		drop temp year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if coname=="†"
		bys coname: egen nvals = nvals(fipsco)
		//MANUAL
		replace fipsco= 36005 if coname=="BRONX COUNTY"
		replace fipsco=36081 if coname =="QUEENS COUNTY"
		collapse (firstnm) fipsco, by(coname) 
		replace coname = regexr(coname," COUNTY","")
		tempfile tempny
		save `tempny'
	restore
	reclink coname using ///
		`tempny', idmaster(county_code) idusing(fipsco) gen(match)
	drop _merge match
	isid county_code
	isid fipsco
	
	keep tot??? fipsco fips
		
	foreach var in wht blk oth {
		replace tot`var' = tot`var'/tottch
		rename tot`var' pct`var'
		}
		
	save "$data/ny_tch_demo.dta", replace

********************************************************************************
* Pennsylvania
********************************************************************************

	import excel "$raw/teacher_demo/Complete-Data-File-for-Report.xlsx", ///
		sheet("County-Level") firstrow case(lower) clear
	keep county teacherstotal teacherswhite teachersblack
	rename teacherswhite totwht
	rename teachersblack totblk
	rename teacherstotal tottch
	replace totblk= totblk*tottch
	replace totwht = totwht*tottch
	gen tototh = tottch-(totwht+totblk)
	replace county = upper(county)
	rename county coname
	isid coname
	gen id=_n
		
	// Get county names/codes from CCD
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="PENNSYLVANIA"
		keep coname???? fipsco????
		gen temp = _n
		reshape long coname fipsco, i(temp) j(year)
		replace coname = upper(coname)
		drop temp year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if coname=="†"
		bys coname: egen nvals = nvals(fipsco)
		//manula
		replace fipsco= 42029 if coname=="CHESTER COUNTY"
		collapse (firstnm) fipsco, by(coname) 
		replace coname = regexr(coname," COUNTY","")
		tempfile temppa
		save `temppa'
	restore
	reclink coname using ///
		`temppa', idmaster(id) idusing(fipsco) gen(match)
	assert match==1
	drop _merge match
	isid coname
	isid fipsco
		
	keep tot??? fipsco fips
	
	foreach var in wht blk oth {
		replace tot`var' = tot`var'/tottch
		rename tot`var' pct`var'
		}

	gen fips = 42
	
	save "$data/pa_tch_demo.dta", replace
	
********************************************************************************
* New Jersey
********************************************************************************

	forvalues y=2009/2016 {
		di "`y'"
		import excel using "$raw/teacher_demo/new jersey/county_demo_`y'.xlsx", clear firstrow case(lower)
		replace position = upper(position)
		replace sex = upper(sex)
		keep if position=="TEACHER"
		keep if sex=="TOTAL"
		keep county white black total
		destring *, replace
		rename white totwht
		rename black totblk
		rename total tottch
		gen tototh = tottch-(totblk+totwht)
		drop if county=="STATE SUM" | county=="Charter"
		replace county= upper(county)
		gen y = `y'
		tempfile nj`y'
		save `nj`y''
		}
	use `nj2009', clear
	forvalues y=2010/2016 {
		append using `nj`y''
		}
	isid county y	
	drop if inlist(county,"NEW JERSEY","STATE SUMMARY","CHARTERS")
	collapse (sum) tot???, by(county)
	replace county = upper(county)
	rename county coname
	gen id=_n
	
	// Get county names/codes from CCD
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="NEW JERSEY"
		keep coname???? fipsco????
		gen temp = _n
		reshape long coname fipsco, i(temp) j(year)
		replace coname = upper(coname)
		drop temp year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if coname=="†"
		bys coname: egen nvals = nvals(fipsco)
		//manula
		replace fipsco= 34013 if coname=="ESSEX COUNTY"
		replace fipsco = 34015 if coname=="GLOUCESTER COUNTY"
		collapse (firstnm) fipsco, by(coname) 
		drop if fipsco==.
		replace coname = regexr(coname," COUNTY","")
		tempfile tempnj
		save `tempnj'
	restore
	reclink coname using ///
		`tempnj', idmaster(id) idusing(fipsco) gen(match)
	assert match==1
	drop _merge match
	isid fipsco	
	keep tot??? fipsco fips

	foreach var in wht blk oth {
		replace tot`var' = tot`var'/tottch
		rename tot`var' pct`var'
		}
	
	gen fips=34
	
	save "$data/nj_tch_demo.dta", replace
	
********************************************************************************
* Tennessee
********************************************************************************

	import excel using "$raw/teacher_demo/data_2017_educator_race_ethnicity_by_district.xlsx", clear firstrow case(lower)
	keep if staff=="Teacher"
	drop if mi(districtnumber)
	keep white blackorafricanamerican districtnum districtname
	isid districtnum
	preserve
		import excel using "$raw/teacher_demo/tennessee_2016-17_fte.xlsx", clear firstrow case(lower)
		tempfile temptn
		save `temptn'
	restore
	replace districtname = upper(districtname)
	rename districtname leaname
	reclink leaname using ///
		`temptn', idmaster(districtnumber) idusing(leaid) gen(match)		
	assert match==1
	drop match _merge
	isid leaname
	rename black totblk
	rename white totwht
	gen tototh = 1-(totblk+totwht)
	foreach var in wht blk oth {
		replace tot`var' = tot`var'*tottch
		}
	collapse (sum) tot???, by(fipsco)
	destring fipsco, replace
	
	gen fips = 47
	foreach var in wht blk oth {
		replace tot`var' = tot`var'/tottch
		rename tot`var' pct`var'
		}
	
	save "$data/tn_tch_demo.dta", replace

********************************************************************************
* Arizona
********************************************************************************

	import excel using "$raw/teacher_demo/arizona/data_entry_tch_demo_county_09-16.xlsx", clear firstrow case(lower)
	// This was manually entered from PDFs by MC
	
	replace countyname = upper(countyname)
	isid year countycode
	collapse (sum) white black total, by(countyname countycode)
	rename white totwht
	rename black totblk
	rename total tottch
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen tototh = tottch-totwht-totblk
	gen pctoth = tototh/tottch
	rename countyname coname 
	isid coname
	
	// Get county names/codes from CCD
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="ARIZONA"
		keep coname???? fipsco????
		gen temp = _n
		reshape long coname fipsco, i(temp) j(year)
		replace coname = upper(coname)
		drop temp year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if coname=="†"
		bys coname: egen nvals = nvals(fipsco)
		//manula
		replace fipsco= 4021 if coname=="PINAL COUNTY"
		collapse (firstnm) fipsco, by(coname) 
		drop if fipsco==.
		replace coname = regexr(coname," COUNTY","")
		tempfile tempaz
		save `tempaz'
	restore
	reclink coname using ///
		`tempaz', idmaster(countycode) idusing(fipsco) gen(match)
	assert match==1
	drop _merge match
	isid countycode
		
	gen fips = 4
	keep fipsco pctblk pctwht tottch pctoth fips
	
	save "$data/az_tch_demo.dta", replace

********************************************************************************
* Minnesota
********************************************************************************

	forvalues y=2009/2014 {
		local ym1=`y'-1
		
		import excel using "$raw/teacher_demo/minnesota//`ym1'-`y'_Demographics.xlsx", clear firstrow case(lower) sheet("Teachers")
		renvars *, lower
		assert mi(b[1],b[2],b[3],b[4])
		assert h[5]=="CountyNumber" & i[5]=="CountyName"
		assert g[5]=="SchName"
		assert k[5]=="TotFTE" 
		assert t[5]=="BlackMale"
		assert u[5]=="BlackFemale"
		assert v[5]=="WhiteMale"
		assert w[5]=="WhiteFemale"
		replace g= upper(g)
		tab g if regexm(g,"TOTA")
		replace g = trim(g)
		keep if g=="TOTALS"
		
		rename h countynumber
		rename i coname
		rename k tottch
		rename t blkm
		rename u blkf
		rename v whtm
		rename w whtf
		
		isid d
		
		keep countynumber coname tottch blkm blkf whtm whtf
		destring *, replace
		collapse (sum) tottch blkm blkf whtm whtf, by(countynumber coname)
		replace coname = trim(coname)
		replace coname = upper(coname)
		assert countynumber==0 if mi(coname)
		drop if mi(coname)

		gen year = `y'
		tempfile y`y'
		save `y`y''
		}
	forvalues y=2015/2016 {
		local ym1=`y'-1
		
		import excel using "$raw/teacher_demo/minnesota//`ym1'-`y'_Demographics.xlsx", clear firstrow case(lower) sheet("Teachers")
		renvars *, lower
		assert mi(b[1],b[2],b[3],b[4])
		assert h[3]=="CountyNumber" & i[3]=="CountyName"
		assert g[3]=="SchName"
		assert k[3]=="TotFTE" 
		assert t[3]=="BlackMale"
		assert u[3]=="BlackFemale"
		assert v[3]=="WhiteMale"
		assert w[3]=="WhiteFemale"
		replace g= upper(g)
		tab g if regexm(g,"TOTA")
		replace g = trim(g)
		keep if g=="TOTALS"
		
		rename h countynumber
		rename i coname
		rename k tottch
		rename t blkm
		rename u blkf
		rename v whtm
		rename w whtf
		
		isid d
		
		keep countynumber coname tottch blkm blkf whtm whtf
		destring *, replace
		collapse (sum) tottch blkm blkf whtm whtf, by(countynumber coname)
		replace coname = trim(coname)
		replace coname = upper(coname)
		assert countynumber==0 if mi(coname)
		drop if mi(coname)

		gen year = `y'
		tempfile y`y'
		save `y`y''
		}
	use `y2009', clear
	forvalues y=2010/2016 {
		append using `y`y''
		}
	isid year coname
	gen totwht = whtm+whtf
	gen totblk = blkm+blkf
	gen tototh = tottch-totwht-totblk
	collapse (sum) tot???, by(countynumber coname)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	
	// Get county names/codes from CCD
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="MINNESOTA"
		keep coname???? fipsco????
		gen temp = _n
		reshape long coname fipsco, i(temp) j(year)
		replace coname = upper(coname)
		drop temp year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if coname=="†"
		bys coname: egen nvals = nvals(fipsco)
		//manula
		replace fipsco= 27037 if coname=="DAKOTA COUNTY"
		replace fipsco=27117 if coname=="PIPESTONE COUNTY"
		replace fipsco=27123 if coname=="RAMSEY COUNTY"
		replace fipsco=27145 if coname=="STEARNS COUNTY"
		collapse (firstnm) fipsco, by(coname) 
		drop if fipsco==.
		replace coname = regexr(coname," COUNTY","")
		tempfile tempmn
		save `tempmn'
	restore
	reclink coname using ///
		`tempmn', idmaster(countynumber) idusing(fipsco) gen(match)
		assert coname=="UNKNOWN" if _merge!=3
		keep if _merge==3
		assert match==1
		drop _merge match
		isid fipsco
	gen fips = 27	
	keep fipsco pctblk pctwht tottch pctoth fips
		
	save "$data/mn_tch_demo.dta", replace

********************************************************************************
* Kentucky
********************************************************************************

	forvalues y=2014/2015 {
		import excel using "$raw/teacher_demo/kentucky/LEARNING_ENVIRONMENT_STUDENTS-TEACHERS`y'.xlsx", clear firstrow case(lower) 
		replace sch_name = upper(sch_name)
		tab sch_name if regexm(sch_name,"TOTAL")
		keep if regexm(sch_name,"DISTRICT TOTAL")
		keep sch_year cntyno cntyname dist_number dist_name sch_number sch_name sch_cd state_sch_id ncesid fte_tch_total white_fte_total black_fte_total
		replace white_fte_total = regexr(white_fte_total,",","")
		replace fte_tch_total = regexr(fte_tch_total,",","")
		destring *, replace
		isid dist_number
		collapse (sum) *fte*, by(cntyno cntyname sch_year)
		tempfile y`y'
		save `y`y''
		}
	use `y2014', clear
	append using `y2015'
	rename white totwht
	rename black totblk
	rename fte tottch
	gen tototh = tottch-totwht-totblk
	isid cntyno sch_year

	collapse (sum) tot*, by(cntyno cntyname)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	rename cntyname coname
	
	// Get county names/codes from CCD	
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="KENTUCKY"
		keep coname???? fipsco????
		gen temp = _n
		reshape long coname fipsco, i(temp) j(year)
		replace coname = upper(coname)
		drop temp year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if coname=="†"
		bys coname: egen nvals = nvals(fipsco)
		assert nvals==1 | nvals ==.
		collapse (firstnm) fipsco, by(coname)  
		drop if fipsco==.
		replace coname = regexr(coname," COUNTY","")
		tempfile tempky
		save `tempky'
	restore
	reclink coname using ///
		`tempky', idmaster(cntyno) idusing(fipsco) gen(match)
	assert match==1
	assert !mi(fipsco)
	drop _merge match
	isid coname
	gen fips = 21	
	keep fipsco pctblk pctwht tottch pctoth fips
			
	save "$data/ky_tch_demo.dta", replace

********************************************************************************
* DC
********************************************************************************

	import excel using "$raw/teacher_demo/dcps2018-19.xlsx", clear firstrow
	// Manually input from a PDF
	egen tottch = rowtotal(black white hisp asian other notreported)
	collapse (sum) black white tottch
	rename black totblk
	rename white totwht
	gen tototh = tottch-totwht-totblk
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	gen fips = 11
	gen fipsco = 11001
	keep fipsco pctblk pctwht tottch pctoth fips
	save "$data/dc_tch_demo.dta", replace

********************************************************************************
* Arkansas
********************************************************************************

	forvalues y= 2009/2016 {
		import excel using "$raw/teacher_demo/arkansas/FY`y'_Counties_CertifiedTeacherByRace.xlsx", clear firstrow
		renvars *, lower
		assert certifiedteacheronlybyrace[1]=="Location County ID" & b[1]=="Location County Name" & e[1]=="Black/African American" & i[1]=="White" & j[1]=="Total"
		keep certifiedteacheronlybyrace b e i j
		drop if _n==1
		rename certifiedteacheronlybyrace countycode
		rename b coname
		rename e totblk
		rename i totwht
		rename j tottch
		gen year = `y'
		destring *, replace
		tempfile y`y'
		save `y`y''
	}
	use `y2009', clear
	forvalues y=2010/2016 {
		append using `y`y''
		}
	egen rm =rowmiss(*)
	drop if rm!=0
	replace coname = trim(coname)
	isid countycode year
	collapse (sum) tot*, by(coname countycode)

	gen tototh = tottch-totwht-totblk
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	destring *, replace
	
	// Get county names/codes from CCD
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="ARKANSAS"
		keep coname???? fipsco????
		gen temp = _n
		reshape long coname fipsco, i(temp) j(year)
		replace coname = upper(coname)
		drop temp year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if coname=="†"
		bys coname: egen nvals = nvals(fipsco)
		//manula
		replace fipsco=5147 if coname=="WOODRUFF COUNTY"
		collapse (firstnm) fipsco, by(coname) 
		drop if fipsco==.
		replace coname = regexr(coname," COUNTY","")
		tempfile tempar
		save `tempar'
	restore
	reclink coname using ///
		`tempar', idmaster(countycode) idusing(fipsco) gen(match)
	list if match!=1
	drop _merge match
	isid coname
	gen fips = 5
	keep fipsco pctblk pctwht tottch pctoth fips

	save "$data/ar_tch_demo.dta", replace
	
********************************************************************************
* Alabama
********************************************************************************

	import excel using "$raw/teacher_demo/alabama/2014-2015 Educator Demographics.xlsx", clear firstrow 
	renvars *, lower
	assert alabamastatedepartmentofeduc[4]=="SystemCode" & b[4]=="SystemName" & c[4]=="SchoolCode" & e[4]=="GenderCode" & g[4]=="RaceCode" & k[4]=="EducatorSubPopulationCode" & l[4]=="EducatorSubPopulationName" & m[4] == "demographicCount" & h[4]=="RaceName"
	assert i[4]=="EthnicityCode" & j[4]=="EthnicityName"
	keep alabamastatedepartmentofeduc b e g k l m c h i j
	rename alabamastatedepartmentofeduc leaid
	rename b leaname
	rename c schid
	rename e gender
	rename g race
	rename h racedesc
	rename i eth
	rename j ethdesc
	rename k type
	rename l typedesc
	rename m tot
	keep if _n > 4
	destring *, replace
	tab gender
	keep if gender=="ALL"
	tab race racedesc
	keep if inlist(race,"ALL","B","W")
	tab type typedesc
	keep if type=="TC"
	tab eth ethdesc
	keep if eth=="ALL"
	isid leaid schid race
	drop gender racedesc eth ethdesc type typedesc
	replace race = "blk" if race=="B"
	replace race = "tch" if race=="ALL"
	replace race= "wht" if race=="W"
	reshape wide tot, i(leaid leaname schid) j(race) string
	replace totblk = 0 if mi(totblk)
	replace totwht = 0 if mi(totwht)
	assert !mi(tottch)
	gen tototh = tottch-totblk-totwht
	isid schid leaid
	collapse (sum) tot*,by(leaid leaname)
	gen year = 2015
	tempfile y2015
	save `y2015'

	import excel using "$raw/teacher_demo/alabama/2015-2016 Educator Demographics.xlsx", clear firstrow 
	renvars *, lower
	assert alabamastatedepartmentofeduc[4]=="SystemCode" & b[4]=="SystemName" & c[4]=="SchoolCode" & e[4]=="GenderCode" & g[4]=="RaceCode" & k[4]=="EducatorSubPopulationCode" & l[4]=="EducatorSubPopulationName" & m[4] == "demographicCount" & h[4]=="RaceName"
	assert i[4]=="EthnicityCode" & j[4]=="EthnicityName"
	keep alabamastatedepartmentofeduc b e g k l m c h i j
	rename alabamastatedepartmentofeduc leaid
	rename b leaname
	rename c schid
	rename e gender
	rename g race
	rename h racedesc
	rename i eth
	rename j ethdesc
	rename k type
	rename l typedesc
	rename m tot
	keep if _n > 4
	destring *, replace
	tab gender
	keep if gender=="ALL"
	tab race racedesc
	keep if inlist(race,"ALL","B","W")
	tab type typedesc
	keep if type=="TC"
	tab eth ethdesc
	keep if eth=="ALL"
	isid leaid schid race
	drop gender racedesc eth ethdesc type typedesc
	replace race = "blk" if race=="B"
	replace race = "tch" if race=="ALL"
	replace race= "wht" if race=="W"
	reshape wide tot, i(leaid leaname schid) j(race) string
	replace totblk = 0 if mi(totblk)
	replace totwht = 0 if mi(totwht)
	assert !mi(tottch)
	gen tototh = tottch-totblk-totwht
	isid schid leaid
	collapse (sum) tot*,by(leaid leaname)
	gen year = 2016
	append using `y2015'

	replace leaname = upper(leaname)
	bys leaid leaname: assert _N == 2
	isid leaid year
	collapse (sum) tot*,by(leaid leaname)
	rename leaid distid
	
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="ALABAMA"
		keep leaname leaid coname???? fipsco????
		reshape long coname fipsco, i(leaname leaid) j(year)
		replace coname = upper(coname)
		drop year
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		replace coname = "" if coname=="†"
		bys leaid: egen nvals = nvals(fipsco)
		tab nvals, m
		tempfile tempal
		save `tempal'
	restore
	merge 1:m leaname using `tempal'
	assert leaname == "ALABAMA STATE DEPARTMENT OF EDUCATION" if _merge==1
	keep if _merge==3
	
	assert nvals == 1 | mi(nvals)
	bys leaid: egen max = max(fipsco)
	assert !mi(max)
	collapse (firstnm) fipsco coname, by(distid leaname tot*)
	isid distid
	collapse (sum) tot*,by(fipsco coname)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	gen fips = 1
	keep fipsco pctblk pctwht tottch pctoth fips

	save "$data/al_tch_demo.dta", replace

********************************************************************************
* South Carolina
********************************************************************************

	import excel using "$raw/teacher_demo/SOUTH CAROLINA TEACHERS BY RACE AND GENDER 2015-16.xlsx", clear firstrow
	renvars *, lower
	keep white* black* totalstaff district
	egen totwht = rowtotal(white*)
	egen totblk = rowtotal(black*)
	rename totalstaff tottch
	gen tototh = tottch-totwht-totblk
	drop white* black*
	drop if mi(district)
	bys district: assert _N==1
	gen id = _n
	rename district leaname

	drop if tottch==. & totblk+totwht==0
	drop if leaname=="STATE TOTAL"
	replace leaname ="BEAUFORT 1" if leaname=="BEAUFORT"
	isid leaname
	
	// Merge to CCD
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="SOUTH CAROLINA"
		keep leaid leaname fipsco????
		reshape long fipsco, i(leaid leaname) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		egen tag = tag(leaid)
		tab nvals if tag // <1%
		assert leaname=="SC PUBLIC CHARTER SCHOOL DISTRICT" if nvals==2 // couldn't say which
		drop if nvals==2
		keep fipsco leaid leaname
		duplicates drop
		isid leaid
		
			// manual edits to make it merge
			forvalues n=1/9 {
				replace leaname = regexr(leaname,"0`n'","`n'")
				}
			replace leaname = trim(leaname)
			
		gen temp = substr(leaname,-2,.)
		destring temp, force replace
		split leaname,p(" ")
		replace leaname1 = "" if mi(temp)
		bys leaname1: egen nvals2= nvals(temp)
		replace leaname = leaname1 if nvals2==1
		drop temp leaname? nvals2
			
		tempfile tempsc
		save `tempsc'
	restore

	merge 1:1 leaname using `tempsc', keep(1 3)

		// Manually replace counties
		replace fipsco = 45007 if leaname == "ANDERSON 1 & 2 CAREER"
		replace fipsco = 45007 if leaname == "ANDERSON ALTERNATIVE SCHOOL"
		replace fipsco = 45011 if leaname == "BARNWELL CO AVC"
		replace fipsco = 45053 if leaname == "BEAUFORT-JASPER CAREER"
		replace fipsco = 45075 if leaname == "COPE AVC"
		replace fipsco = 45083 if leaname == "DANIEL MORGAN VOC"
		replace fipsco = 45033 if leaname == "DILLON COUNTY TECHNOLOGY"
		replace fipsco = 45035 if leaname == "DORCHESTER CAREER SCHOOL"
		replace fipsco = 45027 if leaname == "F E DUBOSE AVC"
		replace fipsco = 45075 if leaname == "FELTON LAB"
		replace fipsco = 45047 if leaname == "GREENWOOD CO AVC"
		replace fipsco = 45083 if leaname == "H B SWOFFORD"
		replace fipsco = 45079 if leaname == "PALMETTO UNIFIED"
		replace fipsco = 45083 if leaname == "R D ANDERSON TECH"
		replace fipsco = 45085 if leaname == "SUMTER"
		replace fipsco = 45083 if leaname == "SCH FOR DEAF & BLIND"
		
		replace _merge=3 if leaname == "ANDERSON 1 & 2 CAREER"
		replace _merge=3 if leaname == "ANDERSON ALTERNATIVE SCHOOL"
		replace _merge=3 if leaname == "BARNWELL CO AVC"
		replace _merge=3 if leaname == "BEAUFORT-JASPER CAREER"
		replace _merge=3 if leaname == "COPE AVC"
		replace _merge=3 if leaname == "DANIEL MORGAN VOC"
		replace _merge=3 if leaname == "DILLON COUNTY TECHNOLOGY"
		replace _merge=3 if leaname == "DORCHESTER CAREER SCHOOL"
		replace _merge=3 if leaname == "F E DUBOSE AVC"
		replace _merge=3 if leaname == "FELTON LAB"
		replace _merge=3 if leaname == "GREENWOOD CO AVC"
		replace _merge=3 if leaname == "H B SWOFFORD"
		replace _merge=3 if leaname == "PALMETTO UNIFIED"
		replace _merge=3 if leaname == "R D ANDERSON TECH"
		replace _merge=3 if leaname == "SUMTER"
		replace _merge=3 if leaname == "SCH FOR DEAF & BLIND"
		
	tab leaname if _merge==1
	keep if _merge==3

	bys id: egen max = max(fipsco)
	assert !mi(max)
	bys id: egen nvals = nvals(fipsco)
	assert nvals==1
	isid id

	collapse (sum) tot*, by(fipsco)

	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	gen fips = 45
	keep fipsco pctblk pctwht tottch pctoth fips

	save "$data/sc_tch_demo.dta", replace
	
********************************************************************************
* Connecticut
********************************************************************************

	forvalues y=2009/2016{
		insheet using "$raw/teacher_demo/connecticut/StaffingRace`y'.csv", comma clear
		rename v1 district
		rename v2 race
		rename v3 count
		rename v4 pct
		keep if _n>=3

		destring count pct, replace
		egen id = group(district)
		bys id: egen tot = total(pct)
		sum tot
		drop tot
		bys id: egen tottch=total(count)

		tab race
		keep if regexm(race,"Black")|regexm(race,"White")
		replace race = "blk" if regexm(race,"Black")
		replace race = "wht" if regexm(race,"White")
		rename count tot
		drop pct
		reshape wide tot, i(district id tottch) j(race) string
		replace totblk = 0 if mi(totblk)
		replace totwht = 0 if mi(totwht)
		gen tototh = tottch-totblk-totwht
		replace district = upper(district)
		gen year=`y'
		renvars tot*, suffix(`y')
		save "$raw/teacher_demo/connecticut/staff`y'.dta", replace
	}
	
	use "$raw/teacher_demo/connecticut/staff2009.dta", clear
	replace district="ELM CITY COLLEGE PREPARATORY SCHOOL DISTRICT" if district=="ELM CITY COLLEGE PREPARATORY SCHOOL"
	merge 1:1 district using "$raw/teacher_demo/connecticut/staff2010.dta", nogen
	merge 1:1 district using "$raw/teacher_demo/connecticut/staff2011.dta", nogen
	merge 1:1 district using "$raw/teacher_demo/connecticut/staff2012.dta", assert(3) nogen
	merge 1:1 district using "$raw/teacher_demo/connecticut/staff2014.dta", nogen
	merge 1:1 district using "$raw/teacher_demo/connecticut/staff2015.dta", nogen
	merge 1:1 district using "$raw/teacher_demo/connecticut/staff2016.dta", nogen
	drop year id
	isid district
	egen id=group(district)
	reshape long totblk totwht tottch tototh, i(district id) j(year)
	collapse (sum) tot*, by(district id)
	
	// Manual prep for fuzzy merge
	rename district leaname
	replace leaname = "BRIDGEPORT ACHIEVEMENT FIRST" if leaname=="ACHIEVEMENT FIRST BRIDGEPORT ACADEMY DISTRICT"
	replace leaname = "ACHIEVEMENT FIRST HARTFORD AC" if leaname=="ACHIEVEMENT FIRST HARTFORD ACADEMY DISTRICT"
	replace leaname = "AREA COOPERATIVE EDUCATIONAL" if leaname=="AREA COOPERATIVE EDUCATIONAL SERVICES"
	replace leaname = "BOOKER T WASHINGTON ACADEMY D" if leaname=="BOOKER T. WASHINGTON ACADEMY DISTRICT"
	replace leaname = "BRASS CITY CHARTER SCHOOL DIS" if leaname=="BRASS CITY CHARTER SCHOOL DISTRICT"
	replace leaname = "CAPITAL PREPARATORY HARBOR SC" if leaname=="CAPITAL PREPARATORY HARBOR SCHOOL DISTRICT"
	replace leaname = "CAPITOL REGION EDUCATION COUN" if leaname=="CAPITOL REGION EDUCATION COUNCIL"
	replace leaname = "CHARTER SCHOOL FOR YOUNG CHIL" if leaname=="CHARTER SCHOOL FOR YOUNG CHILDREN ON ASYLUM HILL DISTRICT"
	replace leaname = "COMMON GROUND HIGH SCHOOL DIS" if leaname=="COMMON GROUND HIGH SCHOOL DISTRICT"
	replace leaname = "CONNECTICUT TECHNICAL HIGH SC" if leaname=="CONNECTICUT TECHNICAL EDUCATION AND CAREER SYSTEM"
	replace leaname = "COOPERATIVE EDUCATIONAL SERVI" if leaname=="COOPERATIVE EDUCATIONAL SERVICES"
	replace leaname = "DEPARTMENT OF MENTAL HEALTH A" if leaname=="DEPARTMENT OF MENTAL HEALTH AND ADDICTION SERVICES"
	replace leaname = "EASTERN CONNECTICUT REGIONAL" if leaname=="EASTERN CONNECTICUT REGIONAL EDUCATIONAL SERVICE CENTER (EASTCONN)"
	replace leaname = "ELM CITY COLLEGE PREPARATORY" if leaname=="ELM CITY COLLEGE PREPARATORY SCHOOL DISTRICT"
	replace leaname = "ELM CITY MONTESSORI SCHOOL DI" if leaname=="ELM CITY MONTESSORI SCHOOL DISTRICT"
	replace leaname = "GREAT OAKS CHARTER SCHOOL BRI" if leaname=="GREAT OAKS CHARTER SCHOOL DISTRICT"
	replace leaname = "HIGHVILLE CHARTER SCHOOL" if leaname=="HIGHVILLE CHARTER SCHOOL DISTRICT"
	replace leaname = "INTEGRATED DAY CHARTER SCHOOL" if leaname=="INTEGRATED DAY CHARTER SCHOOL DISTRICT"
	replace leaname = "INTRDIST SCH FOR ARTS AND COM" if leaname=="INTERDISTRICT SCHOOL FOR ARTS AND COMM DISTRICT"
	replace leaname = "NEW BEGINNINGS INC." if leaname=="NEW BEGINNINGS INC FAMILY ACADEMY DISTRICT"
	replace leaname = "NORTH BRANFORD SCHOOL DISTRIC" if leaname=="NORTH BRANFORD SCHOOL DISTRICT"
	replace leaname = "NORTH STONINGTON SCHOOL DISTR" if leaname=="NORTH STONINGTON SCHOOL DISTRICT"
	replace leaname = "NORWICH FREE ACADEMY" if leaname=="NORWICH FREE ACADEMY DISTRICT"
	replace leaname = "ODYSSEY COMMUNITY SCHOOL DIST" if leaname=="ODYSSEY COMMUNITY SCHOOL DISTRICT"
	replace leaname = "PARK CITY PREP CHARTER SCHOOL" if leaname=="PARK CITY PREP CHARTER SCHOOL DISTRICT"
	replace leaname = "SIDE BY SIDE COMMUNITY SCHOOL" if leaname=="SIDE BY SIDE CHARTER SCHOOL DISTRICT"
	replace leaname = "STAMFORD ACADEMY" if leaname=="STAMFORD ACADEMY DISTRICT"
	replace leaname = "STAMFORD CHARTER SCHOOL FOR E" if leaname=="STAMFORD CHARTER SCHOOL FOR EXCELLENCE DISTRICT"
	replace leaname = "THE GILBERT SCHOOL" if leaname=="THE GILBERT SCHOOL DISTRICT"
	replace leaname = "WOODSTOCK ACADEMY" if leaname=="THE WOODSTOCK ACADEMY DISTRICT"
	
	// Assign counties
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="CONNECTICUT"
		keep leaid leaname fipsco????
		reshape long fipsco, i(leaid leaname) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		assert nvals==1
		drop nvals
		keep leaname leaid fipsco
		duplicates drop
		isid leaname
		tempfile tempct
		save `tempct'
	restore
	merge 1:1 leaname using `tempct', keep(1 3)
	
		// Manually replace county
		replace fipsco= 9005 if leaname=="EDADVANCE"
		replace fipsco= 9011 if leaname=="PROJECT OCEANOLOGY"
		tab leaname if mi(fipsco)
		// looks ok. 
		
	keep if !mi(fipsco)
	isid leaname
	collapse (sum) tot*, by(fipsco)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	gen fips = 9
	keep fipsco pctblk pctwht tottch pctoth fips
	
	save "$data/ct_tch_demo.dta", replace
	
********************************************************************************
* North Carolina
********************************************************************************

	insheet using "$raw/teacher_demo/personnel_summary.csv", comma clear name
	keep if inrange(year, 2009,2016)
	egen tottch = rowtotal(genderwhite genderblack genderother)
	isid leaname year assignmentclassification
	collapse (sum) genderwhite genderblack genderother tottch, by(lea leaname)
	renvars gender*, presub(gender tot)
	rename totwhite totwht
	rename totblack totblk
	rename totother tototh
	assert totwht+totblk+tototh==tottch
	replace leaname = upper(leaname)
	isid lea
	
	// Prep for manual merge
	replace leaname = "EXPLORIS CHARTER" if leaname=="EXLORIS CHARTER"	
	replace leaname ="ANDERSON CREEK ACADEMY" if leaname =="ANDERSON CREEK CLUB CHARTER SCHOOL"
	replace leaname ="ARAPAHOE CHARTER SCHOOL" if leaname =="ARAPAHOE CHARTER"
	replace leaname ="ARTS BASED SCHOOL" if leaname =="ARTS BASED ELEMENTARY"
	replace leaname ="BRIDGES ACADEMY" if leaname =="BRIDGES CHARTER"
	replace leaname ="CAROLINA INTERNATIONAL SCHOOL" if leaname =="CAROLINA INTERNATIONAL"
	replace leaname ="CARTER COMMUNITY CHARTER" if leaname =="CARTER COMMUNITY"
	replace leaname ="CARTER G WOODSON SCHOOL" if leaname =="CARTER G WOODSON"
	replace leaname ="CARTERET COUNTY PUBLIC SCHOOLS" if leaname =="CARTERET COUNTY SCHOOLS"
	replace leaname ="CENTRAL PARK SCHOOL FOR CHILDREN" if leaname =="CENTRAL PARK FOR CHILDREN"
	replace leaname ="CHAPEL HILL-CARRBORO CITY SCHOOLS" if leaname =="CHAPEL-HILL/CARRBORO CITY SCHOOLS"
	replace leaname ="CHARLOTTE SECONDARY SCHOOL" if leaname =="CHARLOTTE SECONDARY"
	replace leaname ="CHARLOTTE-MECKLENBURG SCHOOLS" if leaname =="CHARLOTTE-MECKLENBURG COUNTY SCHOOLS"
	replace leaname ="CHARTER DAY SCHOOL" if leaname =="CHARTER DAY"
	replace leaname ="CHARTER DAY SCHOOL" if leaname =="COLUMBUS CHARTER"
	replace leaname ="CHARTER DAY SCHOOL" if leaname =="CROSSCREEK CHARTER"
	replace leaname ="DURHAM PUBLIC SCHOOLS" if leaname =="DURHAM COUNTY SCHOOLS"
	replace leaname ="EDENTON-CHOWAN SCHOOLS" if leaname =="EDENTON-CHOWAN COUNTY SCHOOLS"
	replace leaname ="EDGECOMBE COUNTY PUBLIC SCHOOLS" if leaname =="EDGECOMBE COUNTY SCHOOLS"
	replace leaname ="THE EXPLORIS SCHOOL" if leaname =="EXPLORIS CHARTER"
	replace leaname ="WINSTON SALEM / FORSYTH COUNTY SCHOOLS" if leaname =="FORSYTH COUNTY SCHOOLS"
	replace leaname ="FRANCINE DELANY NEW SCHOOL" if leaname =="FRANCINE DELANY"
	replace leaname ="GRAY STONE DAY SCHOOL" if leaname =="GRAY STONE DAY"
	replace leaname ="GUILFORD PREPARATORY ACADEMY" if leaname =="GUILFORD PREPARATORY"
	replace leaname ="HEALTHY START ACADEMY" if leaname =="HEALTHY START"
	replace leaname ="HOPE CHARTER LEADERSHIP ACADEMY" if leaname =="HOPE ELEMENTARY"
	replace leaname ="JACKSON COUNTY PUBLIC SCHOOLS" if leaname =="JACKSON COUNTY SCHOOLS"
	replace leaname ="JOHNSTON COUNTY PUBLIC SCHOOLS" if leaname =="JOHNSTON COUNTY SCHOOLS"
	replace leaname ="KESTREL HEIGHTS SCHOOL" if leaname =="KESTREL HEIGHTS"
	replace leaname ="LAKE LURE CLASSICAL ACADEMY" if leaname =="LAKE LURE CLASSICAL"
	replace leaname ="LENOIR COUNTY PUBLIC SCHOOLS" if leaname =="LENOIR COUNTY SCHOOLS"
	replace leaname ="LINCOLN CHARTER SCHOOL" if leaname =="LINCOLN CHARTER"
	replace leaname ="MAUREEN JOY CHARTER" if leaname =="MAUREEN JOY"
	replace leaname ="METROLINA REGIONAL SCHOLARS ACADEMY" if leaname =="METROLINA REG SCHOLARS ACADEMY"
	replace leaname ="MOORESVILLE GRADED SCHOOL DISTRICT" if leaname =="MOORESVILLE CITY SCHOOLS"
	replace leaname ="MOUNTAIN DISCOVERY CHARTER SCHOOL" if leaname =="MOUNTAIN DISCOVERY"
	replace leaname ="NEUSE CHARTER SCHOOL" if leaname =="NEUSE CHARTER"
	replace leaname ="NEWTON CONOVER CITY SCHOOLS" if leaname =="NEWTON-CONOVER CITY SCHOOLS"
	replace leaname ="NORTH EAST CAROLINA PREPARATORY SCHOOL" if leaname =="NORTH EAST CAROLINA PREP SCHOOL"
	replace leaname ="NORTHEAST REGIONAL SCHOOL - BIOTECH/AGRI" if leaname =="NORTHEAST REGIONAL SCHOOL OF BIOTECHNOLOGY AND AGRISCIENCE"
	replace leaname ="OXFORD PREPARATORY SCHOOL" if leaname =="OXFORD PREPARATORY HIGH SCHOOL"
	replace leaname ="PAUL R BROWN LEADERSHIP ACADEMY" if leaname =="PAUL R. BROWN LEADERSHIP ACADEMY"
	replace leaname ="PHOENIX ACADEMY INC" if leaname =="PHOENIX ACADEMY"
	replace leaname ="PIEDMONT COMMUNITY CHARTER" if leaname =="PIEDMONT COMMUNITY"
	replace leaname ="PREEMINENT CHARTER SCHOOL" if leaname =="PREEMINENT CHARTER"
	replace leaname ="QUEEN'S GRANT COMMUNITY SCHOOL" if leaname =="QUEEN'S GRANT COMMUNITY"
	replace leaname ="RANDOLPH COUNTY SCHOOL SYSTEM" if leaname =="RANDOLPH COUNTY SCHOOLS"
	replace leaname ="RESEARCH TRIANGLE CHARTER" if leaname =="RESEARCH TRIANGLE"
	replace leaname ="ROWAN-SALISBURY SCHOOLS" if leaname =="ROWAN-SALISBURY COUNTY SCHOOLS"
	replace leaname ="SALLIE B HOWARD SCHOOL" if leaname =="SALLIE B HOWARD"
	replace leaname ="SANDHILLS THEATRE ARTS RENAISS" if leaname =="SANDHILLS THEATRE ARTS RENAISSANCE"
	replace leaname ="SOUTHERN WAKE ACADEMY" if leaname =="SOUTHERN WAKE ACADEMY (FORMERLY COMMUNITY PARTNERS)"
	replace leaname ="STEM EDUCATION FOR A GLOBAL SOCIETY-SEGS" if leaname =="STEM EDUCATION FOR A GLOBAL SOCIETY ACAD"
	replace leaname ="SUCCESS CHARTER SCHOOL" if leaname =="SUCCESS INSTITUTE"
	replace leaname ="SUGAR CREEK CHARTER" if leaname =="SUGAR CREEK"
	replace leaname ="THE COLLEGE PREPARATORY AND LEADERSHIP A" if leaname =="THE COLLEGE PREPARATORY AND LEADERSHIP ACADEMY OF HIGH POINT"
	replace leaname ="THE MOUNTAIN COMMUNITY SCH" if leaname =="THE MOUNTAIN COMMUNITY"
	replace leaname ="THE NEW DIMENSIONS SCHOOL" if leaname =="THE NEW DIMENSIONS"
	replace leaname ="TRIAD MATH AND SCIENCE ACADEMY" if leaname =="TRIAD MATH & SCIENCE"
	replace leaname ="TRIANGLE MATH AND SCIENCE ACADEMY" if leaname =="TRIANGLE MATH SCIENCE ACADEMY"
	replace leaname ="UNION ACADEMY CHARTER SCHOOL" if leaname =="UNION ACADEMY"
	replace leaname ="UNION COUNTY PUBLIC SCHOOLS" if leaname =="UNION COUNTY SCHOOLS"
	replace leaname ="VANCE CHARTER SCHOOL" if leaname =="VANCE CHARTER"
	replace leaname ="WATER'S EDGE VILLAGE SCHOOL" if leaname =="WATERS EDGE VILLAGE SCHOOL"
	replace leaname ="WAYNE COUNTY PUBLIC SCHOOLS" if leaname =="WAYNE COUNTY SCHOOLS"
	replace leaname ="WILMINGTON PREPARATORY ACADEMY" if leaname =="WILMINGTON PREPARATORY"
	replace leaname ="WOODS CHARTER SCHOOL" if leaname =="WOODS CHARTER"
	replace leaname ="ELIZABETH CITY-PASQUOTANK PUBLIC SCHOOLS" if leaname =="PASQUOTANK COUNTY SCHOOLS"
	replace leaname ="PUBLIC SCHOOLS OF ROBESON COUNTY" if leaname =="ROBESON COUNTY SCHOOLS"
	replace leaname ="NC LEADERSHIP CHARTER ACADEMY" if leaname =="THE NORTH CAROLINA LEADERSHIP ACADEMY"
	
	bys leaname: assert _N==1 if leaname != "CHARTER DAY SCHOOL"
	collapse (sum) tot???, by(leaname)
	sort leaname
	gen id = _n
	
	// Assign most recent county
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="NORTH CAROLINA"
		keep leaid leaname fipsco????
		reshape long fipsco, i(leaid leaname) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		egen tag = tag(leaid)	
		tab nvals if tag // 10%
		gsort leaid -year
		collapse (firstnm) fipsco, by(leaid leaname)
		tempfile tempnc
		save `tempnc'
	restore
	merge 1:m leaname using `tempnc', keep(1 3)
	
		// MANUAL REPLACE
		replace fipsco = 37031 if leaname == "CAPE LOOKOUT MARINE SCIENCE HIGH SCHOOL"
		replace fipsco = 37119 if leaname == "CHILDREN' COMMUNITY SCHOOL"
		replace fipsco = 37011 if leaname == "CROSSNORE ACADEMY"
		replace fipsco = 37047 if leaname == "FLEMINGTON ACADEMY"
		replace fipsco = 37135 if leaname == "ORANGE CHARTER" 
		replace fipsco = 37067 if leaname == "THE DOWNTOWN MIDDLE"
		assert !mi(fipsco)
		
	bys leaname: assert _N ==1
	collapse (sum) tot*, by(fipsco)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	gen fips = 37
	keep fipsco pctblk pctwht tottch pctoth fips

	save "$data/nc_tch_demo.dta", replace

********************************************************************************
* Michigan
********************************************************************************

	import excel "$raw/teacher_demo/Fall_REP_State_Summaries_560670_7/REP_Summary_Race-Ethnicity.xlsx", ///
		sheet("Race-Ethnicity") cellrange(A3:CQ8730) firstrow case(lower) clear

	keep if inrange(year,2008,2015)
	egen tottch = rowtotal(tch_*)
	rename tch_white totwht
	rename tch_black totblk
	keep districtcode districtofficialname year isd totblk totwht tottch
	replace totblk = 0 if mi(totblk)
	replace totwht = 0 if mi(totwht)
	gen tototh = tottch-(totblk+totwht)
	isid districtcode isd year
	collapse (sum) tot???, by(districtcode districtofficialname isd)
	
	rename districtofficialname leaname
	replace leaname = upper(leaname)
	
	bys districtcode: egen nvals = nvals(isd)
	tab nvals // this seems like multiple campuses, can't really separate
	drop nvals
	bys districtcode: egen nvals = nvals(leaname)
	tab nvals
	
	collapse (sum) tot??? (firstnm) leaname, by(districtcode)
	
	replace leaname = "ADRIAN SCHOOL DISTRICT OF THE CITY OF" if leaname == "ADRIAN CITY SCHOOL DISTRICT"
	replace leaname = "AIRPORT COMMUNITY SCHOOLS" if leaname == "AIRPORT COMMUNITY SCHOOL DISTRICT"
	replace leaname = "ALLENDALE PUBLIC SCHOOLS" if leaname == "ALLENDALE PUBLIC SCHOOL DISTRICT"
	replace leaname = "BANGOR PUBLIC SCHOOLS (VAN BUREN)" if leaname == "BANGOR PUBLIC SCHOOLS"
	replace leaname = "BEAR LAKE SCHOOLS" if leaname == "BEAR LAKE SCHOOL DISTRICT"
	replace leaname = "BENTON HARBOR CHARTER SCHOOL ACADEMY" if leaname == "BENTON HARBOR CHARTER SCHOOL"
	replace leaname = "BINGHAM ARTS ACADEMY" if leaname == "BINGHAM ACADEMY"
	replace leaname = "BIRCH RUN AREA SCHOOLS" if leaname == "BIRCH RUN AREA SCHOOL DISTRICT"
	replace leaname = "BIRMINGHAM PUBLIC SCHOOLS" if leaname == "BIRMINGHAM CITY SCHOOL DISTRICT"
	replace leaname = "BLOOMFIELD HILLS SCHOOLS" if leaname == "BLOOMFIELD HILLS SCHOOL DISTRICT"
	replace leaname = "BRANDON SCHOOL DISTRICT IN THE COUNTIES OF OAKLAND AND LAPEE" if leaname == "BRANDON SCHOOL DISTRICT"
	replace leaname = "BUCKLEY COMMUNITY SCHOOLS" if leaname == "BUCKLEY COMMUNITY SCHOOL DISTRICT"
	replace leaname = "BUSINESS ENTREPRENEURSHIP SCIENCE TECH. ACADEMY" if leaname == "BUSINESS ENTREPRENEURSHIP, SCIENCE, TECH. ACADEMY"
	replace leaname = "CAPAC COMMUNITY SCHOOLS" if leaname == "CAPAC COMMUNITY SCHOOL DISTRICT"
	replace leaname = "COMMONWEALTH COMMUNITY DEVELOPMENT ACADEMY" if leaname == "COMMONWEALTH COMMUNITY DEVEL. ACADEMY"
	replace leaname = "CORNERSTONE HEALTH AND TECHNOLOGY SCHOOL" if leaname == "CORNERSTONE HEALTH SCHOOL"
	replace leaname = "CORUNNA PUBLIC SCHOOLS" if leaname == "CORUNNA PUBLIC SCHOOL DISTRICT"
	replace leaname = "COVENANT HOUSE ACADEMY DETROIT" if leaname == "COVENANT HOUSE ACADEMY CENTRAL"
	replace leaname = "EATON RESA" if leaname == "EATON ISD"
	replace leaname = "ECORSE PUBLIC SCHOOLS" if leaname == "ECORSE PUBLIC SCHOOL DISTRICT"
	replace leaname = "FLINT SCHOOL DISTRICT OF THE CITY OF" if leaname == "FLINT CITY SCHOOL DISTRICT"
	replace leaname = "HAMTRAMCK SCHOOL DISTRICT OF THE CITY OF" if leaname == "HAMTRAMCK PUBLIC SCHOOLS"
	replace leaname = "HARPER WOODS THE SCHOOL DISTRICT OF THE CITY OF" if leaname == "CITY OF HARPER WOODS SCHOOLS"
	replace leaname = "HARTFORD PUBLIC SCHOOLS" if leaname == "HARTFORD PUBLIC SCHOOL DISTRICT"
	replace leaname = "HAZEL PARK SCHOOL DISTRICT OF THE CITY OF" if leaname == "HAZEL PARK CITY SCHOOL DISTRICT"
	replace leaname = "HIGHLAND PARK PUBLIC SCHOOL ACADEMY SYSTEM" if leaname == "HIGHLAND PARK PUBLIC SCHOOL ACADEMY"
	replace leaname = "ICADEMY GLOBAL" if leaname == "ICADEMY"
	replace leaname = "IRONWOOD AREA SCHOOLS OF GOGEBIC COUNTY" if leaname == "IRONWOOD AREA SCHOOLS"
	replace leaname = "ISHPEMING PUBLIC SCHOOL DISTRICT NO. 1" if leaname == "ISHPEMING PUBLIC SCHOOL DISTRICT"
	replace leaname = "KALAMAZOO PUBLIC SCHOOLS" if leaname == "KALAMAZOO PUBLIC SCHOOL DISTRICT"
	replace leaname = "KENSINGTON WOODS SCHOOLS" if leaname == "KENSINGTON WOODS HIGH SCHOOL"
	replace leaname = "KINGSBURY COUNTRY DAY SCHOOL" if leaname == "KINGSBURY ACADEMY"
	replace leaname = "LAINGSBURG COMMUNITY SCHOOLS" if leaname == "LAINGSBURG COMMUNITY SCHOOL DISTRICT"
	replace leaname = "LAWRENCE PUBLIC SCHOOLS" if leaname == "LAWRENCE PUBLIC SCHOOL DISTRICT"
	replace leaname = "LEARN LIVE LEAD ACADEMY" if leaname == "LEARN, LIVE, LEAD ACADEMY"
	replace leaname = "LIFETECH ACADEMY" if leaname == "LIFE TECH ACADEMY"
	replace leaname = "LINCOLN PARK SCHOOL DISTRICT OF THE CITY OF" if leaname == "LINCOLN PARK PUBLIC SCHOOLS"
	replace leaname = "LIVONIA PUBLIC SCHOOLS SCHOOL DISTRICT" if leaname == "LIVONIA PUBLIC SCHOOLS"
	replace leaname = "MARTIN LUTHER KING JR. EDUCATION CENTER ACADEMY" if leaname == "MARTIN LUTHER KING, JR. EDUCATION CENTER ACADEMY"
	replace leaname = "MICHIGAN SCHOOL FOR THE DEAF" if leaname == "MICH SCHOOL F/T DEAF/MICH SCHOOL F/T BLIND-LIO"
	replace leaname = "MOREY MONTESSORI PUBLIC SCHOOL ACADEMY" if leaname == "MOREY CHARTER SCHOOL"
	replace leaname = "MUSKEGON HEIGHTS PUBLIC SCHOOL ACADEMY SYSTEM" if leaname == "MUSKEGON HEIGHTS PUBLIC SCHOOL ACADEMY"
	replace leaname = "MUSKEGON PUBLIC SCHOOLS OF THE CITY OF" if leaname == "MUSKEGON CITY SCHOOL DISTRICT"
	replace leaname = "NICE COMMUNITY SCHOOL DISTRICT" if leaname == "N.I.C.E. COMMUNITY SCHOOLS"
	replace leaname = "NILES COMMUNITY SCHOOLS" if leaname == "NILES COMMUNITY SCHOOL DISTRICT"
	replace leaname = "NORTHVIEW PUBLIC SCHOOLS" if leaname == "NORTHVIEW PUBLIC SCHOOL DISTRICT"
	replace leaname = "NORTHWESTERN MICHIGAN MIGRANT" if leaname == "NORTHVIEW PUBLIC SCHOOL DISTRICT"
	replace leaname = "OAK PARK SCHOOL DISTRICT OF THE CITY OF" if leaname == "OAK PARK CITY SCHOOL DISTRICT"
	replace leaname = "PERRY PUBLIC SCHOOLS" if leaname == "PERRY PUBLIC SCHOOL DISTRICT"
	replace leaname = "PLYMOUTH EDUCATIONAL CENTER CHARTER SCHOOL" if leaname == "PLYMOUTH EDUCATIONAL CENTER"
	replace leaname = "PORTLAND PUBLIC SCHOOLS" if leaname == "PORTLAND PUBLIC SCHOOL DISTRICT"
	replace leaname = "POSEN CONSOLIDATED SCHOOL DISTRICT NO. 9" if leaname == "POSEN CONSOLIDATED SCHOOL DISTRICT"
	replace leaname = "PUBLIC SCHOOLS OF CALUMET LAURIUM & KEWEENAW" if leaname == "PUBLIC SCHOOLS OF CALUMET"
	replace leaname = "QUINCY COMMUNITY SCHOOLS" if leaname == "QUINCY COMMUNITY SCHOOL DISTRICT"
	replace leaname = "REDFORD UNION SCHOOLS DISTRICT NO. 1" if leaname == "REDFORD UNION SCHOOL DISTRICT"
	replace leaname = "RIVER ROUGE SCHOOL DISTRICT OF THE CITY OF" if leaname == "RIVER ROUGE SCHOOL DISTRICT"
	replace leaname = "ROSS-HILL ACADEMY" if leaname == "ROSS HILL ACADEMY"
	replace leaname = "SAGINAW SCHOOL DISTRICT OF THE CITY OF" if leaname == "SAGINAW CITY SCHOOL DISTRICT"
	replace leaname = "SHEPHERD PUBLIC SCHOOLS" if leaname == "SHEPHERD PUBLIC SCHOOL DISTRICT"
	replace leaname = "ST. CLAIR COUNTY ACADEMY OF STYLE" if leaname == "ST CLAIR CO. ACADEMY OF STYLE"
	replace leaname = "SUMMERFIELD SCHOOLS" if leaname == "SUMMERFIELD SCHOOL DISTRICT"
	replace leaname = "UNIVERSITY PREPARATORY ACADEMY (PSAD)" if leaname == "UNIVERSITY PREPARATORY ACADEMY"
	replace leaname = "WHITEFORD AGRICULTURAL SCHOOL DISTRICT OF THE COUNTIES OF LE" if leaname == "WHITEFORD AGRICULTURAL SCHOOL DISTRICT OF THE COUNTIES OF LENAWEE AND MONROE"
	replace leaname = "WYANDOTTE SCHOOL DISTRICT OF THE CITY OF" if leaname == "WYANDOTTE CITY SCHOOL DISTRICT"
	replace leaname = "YPSILANTI COMMUNITY SCHOOLS" if leaname == "SCHOOL DISTRICT OF YPSILANTI"
	replace leaname = "HAMILTON ACADEMY" if leaname == "EMAN HAMILTON ACADEMY"
	replace leaname = "MULTICULTURAL ACADEMY" if leaname == "EASTERN WASHTENAW MULTICULTURAL ACADEMY"
	replace leaname = "MUSKEGON COVENANT ACADEMY" if leaname == "COVENANT HOUSE ACADEMY MUSKEGON"
	isid leaname
		
	// Assign most recent county
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="MICHIGAN"
		keep leaid leaname fipsco????
		reshape long fipsco, i(leaid leaname) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		egen tag = tag(leaid)	
		tab nvals if tag // 1%
		gsort leaid -year
		collapse (firstnm) fipsco, by(leaid leaname)
		tempfile tempmi
		save `tempmi'
	restore
	merge 1:m leaname using `tempmi', keep(1 3)
		
		// MANUALLY REPLACE
		replace fipsco=26163 if leaname=="ACADEMY OF DETROIT-WEST"
		replace fipsco=26111 if leaname=="COLEMAN COMMUNITY SCHOOL DISTRICT"
		replace fipsco=26099 if leaname=="CONNER CREEK ACADEMY"
		replace fipsco=26105 if leaname=="MASON-LAKE ISD"
		replace fipsco=26131 if leaname=="WHITE PINE SCHOOL DISTRICT"
		assert !mi(fipsco)
	
	isid leaname
	collapse (sum) tot*, by(fipsco)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	keep fipsco pctblk pctwht tottch pctoth fips
		
	gen fips = 26
	save "$data/mi_tch_demo.dta", replace

********************************************************************************
* Colorado
********************************************************************************

	import excel "$raw/teacher_demo/2018-19 Teachers by District, Race and Gender.xlsx", ///
		sheet("Sheet1") cellrange(A4:Y204) firstrow case(lower) clear
	keep organizationcode orgnazationname l r y
	rename l totblk
	rename r totwht
	rename y tottch
	gen tototh = tottch-(totblk+totwht)
	bys organizationcode: assert _N==1
	bys orgnazationname: assert _N==1
	rename orgnazationname leaname
	drop if mi(leaname)
	destring organizationcode, replace
	rename organizationcode stateid
	isid stateid
	
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk_ma_co_2018_w_state_id.xls", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="COLORADO"
		assert substr(stateid,1,3)=="CO-"
		replace stateid=substr(stateid,4,.)
		destring stateid,replace
		destring fipsco leaid, replace
		bys stateid: gen count = _N
		assert !mi(fipsco)
		rename leaname temp
		tempfile tempco
		save `tempco'
	restore
	merge 1:m stateid using `tempco', assert(2 3) keep(3) nogen
	tab count
	sort leaname
	
	drop if leaname=="ACADEMY 20" &  temp=="ADAMS 27J SCHOOL DISTRICT 27J"
	drop if leaname=="AKRON R-1" & temp == "ARAPAHOE 5 CHERRY CREEK"
	drop if leaname=="ARICKAREE R-2" & temp=="ARAPAHOE 6 LITTLETON"
	drop if leaname=="CHEYENNE MOUNTAIN 12" & temp=="ADAMS 12 NORTHGLENN"
	drop if leaname=="COLORADO SPRINGS 11" & temp=="ADAMS 1 MAPLETON"
	drop if leaname=="CRIPPLE CREEK-VICTOR RE-1" & temp == "ARAPAHOE 1 ENGLEWOOD"
	drop if leaname=="HANOVER 28" & temp=="WESTMINSTER PUBLIC SCHOOLS"
	drop if leaname=="LONE STAR 101" & temp=="ADAMS-ARAP 28J AURORA"
	drop if leaname=="MANITOU SPRINGS 14" & temp == "ADAMS 14 COMMERCE CITY"
	drop if leaname=="WOODLAND PARK RE-2" & temp=="ARAPAHOE 2 SHERIDAN"
	isid leaname
	isid stateid
	
	collapse (sum) tot*, by(fipsco coname)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
		
	gen fips = 8
	keep fipsco pctblk pctwht tottch pctoth fips

	save "$data/co_tch_demo.dta", replace

********************************************************************************
* Massachusetts
********************************************************************************

	forvalues y=2009/2016 {
		import excel using "$raw/teacher_demo/massachusetts/staffracegender`y'.xlsx", clear firstrow case(lower)
		assert regexm(a,"Name")==1 if _n==1
		assert regexm(b,"Code")==1 if _n==1
		assert regexm(c,"African")==1 if _n==1
		assert regexm(f,"White")==1 if _n==1
		assert regexm(l,"FTE")==1 if _n==1
		
		rename a name
		rename b code
		rename c totblk
		rename f totwht
		rename l tottch
		keep name code tot???
		drop if _n==1
		destring *, replace
		gen y = `y'
		tempfile ma`y'
		save `ma`y''
		}
	use `ma2009', clear
	forvalues y=2010/2016 {
		append using `ma`y''
		}
	replace name = upper(name)
	foreach var in totwht totblk tottch {
		replace `var' = regexr(`var',",","")
		destring `var', replace
		}
	gen tototh = tottch-(totblk+totwht)
	isid y code
	isid y name
	gsort code -y
	collapse (sum) tot??? (firstnm) name, by(code)
	drop if code==0
	rename name leaname
	replace leaname = "BROOKE CHARTER SCHOOL (DISTRICT)" if leaname == "BROOKE CHARTER SCHOOL ROSLINDALE (DISTRICT)"
	replace leaname = "CITY ON A HILL CHARTER PUBLIC SCHOOL CIRCUIT STREET (DISTRIC" if leaname == "CITY ON A HILL CHARTER PUBLIC SCHOOL CIRCUIT STREET (DISTRICT)"
	replace leaname = "CITY ON A HILL CHARTER PUBLIC SCHOOL DUDLEY SQUARE (DISTRICT" if leaname == "CITY ON A HILL CHARTER PUBLIC SCHOOL DUDLEY SQUARE (DISTRICT)"
	replace leaname = "COLLEGIATE CHARTER SCHOOL OF LOWELL (DISTRICT)" if leaname == "LOWELL COLLEGIATE CHARTER SCHOOL (DISTRICT)"
	replace leaname = "COMMUNITY DAY CHARTER PUBLIC SCHOOL - R. KINGMAN WEBSTER (DI" if leaname == "COMMUNITY DAY CHARTER PUBLIC SCHOOL - R. KINGMAN WEBSTER (DISTRICT)"
	replace leaname = "EDWARD M. KENNEDY ACADEMY FOR HEALTH CAREERS (HORACE MANN CH" if leaname == "EDWARD M. KENNEDY ACADEMY FOR HEALTH CAREERS (HORACE MANN CHARTER) (DISTRICT)"
	replace leaname = "HAMPDEN CHARTER SCHOOL OF SCIENCE EAST (DISTRICT)" if leaname == "HAMPDEN CHARTER SCHOOL OF SCIENCE (DISTRICT)"
	replace leaname = "MARTIN LUTHER KING JR. CHARTER SCHOOL OF EXCELLENCE (DISTRIC" if leaname == "MARTIN LUTHER KING JR. CHARTER SCHOOL OF EXCELLENCE (DISTRICT)"
	replace leaname = "MASSACHUSETTS VIRTUAL ACADEMY AT GREENFIELD COMMONWEALTH VIR" if leaname == "MASSACHUSETTS VIRTUAL ACADEMY AT GREENFIELD COMMONWEALTH VIRTUAL DISTRICT"
	replace leaname = "PHOENIX ACADEMY PUBLIC CHARTER HIGH SCHOOL SPRINGFIELD (DIST" if leaname == "PHOENIX ACADEMY PUBLIC CHARTER HIGH SCHOOL SPRINGFIELD (DISTRICT)"
	replace leaname = "TRI-COUNTY REGIONAL VOCATIONAL TECHNICAL" if leaname == "TRI COUNTY REGIONAL VOCATIONAL TECHNICAL"
	isid leaname
	
	// Assign most recent county
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="MASSACHUSETTS"
		keep leaid leaname fipsco????
		reshape long fipsco, i(leaid leaname) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		egen tag = tag(leaid)	
		tab nvals if tag // <1%
		gsort leaid -year
		collapse (firstnm) fipsco, by(leaid leaname)
		tempfile tempma
		save `tempma'
	restore
	merge 1:m leaname using `tempma', assert(2 3) keep(3) nogen
		
	isid leaname
	collapse (sum) tot*, by(fipsco)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	gen fips = 25

	keep fipsco pctblk pctwht tottch pctoth fips
		
	save "$data/ma_tch_demo.dta", replace
	
********************************************************************************
* Nebraska
********************************************************************************

	import delimited "$raw/teacher_demo/Teachers_20152016.txt", clear
	keep if type=="DI"
	isid district county schoolyear
	egen rm = rowmiss(*_7)
	assert rm==0
	drop rm
	egen rt = rowtotal(*_7)
	gen diff = teacherfte-rt
		sum diff // basically the same
	drop rt diff
	egen tottch = rowtotal(*_7)
	rename wh_7 totwht
	rename bl_7 totblk
	gen tototh = tottch-totwht-totblk
	isid agencyname schoolyear
	collapse (sum) tot*, by(agencyname)
	rename agency leaname

	// Prep for merge
	replace leaname = "PAPILLION LA VISTA COMMUNITY SCHOOLS" if leaname=="PAPILLION-LA VISTA PUBLIC SCHS"
	// Assign most recent county
	preserve
		import excel using "$raw/teacher_demo/ccd_county_lea_xwalk.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="NEBRASKA"
		keep leaid leaname fipsco????
		reshape long fipsco, i(leaid leaname) j(year)
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring fipsco, force replace
		drop if mi(fipsco)
		bys leaid: egen nvals = nvals(fipsco)
		egen tag = tag(leaid)	
		tab nvals if tag // 3%
		gsort leaid -year
		collapse (firstnm) fipsco, by(leaid leaname)
		tempfile tempne
		save `tempne'
	restore
	merge 1:m leaname using `tempne', keep(1 3) 

		// MANUAL
		replace fipsco=31011 if leaname=="CEDAR RAPIDS PUBLIC SCHOOLS"
		replace fipsco=31027 if leaname=="DORCHESTER PUBLIC SCHOOLS"
		replace fipsco=31027 if leaname=="HARTINGTON PUBLIC SCHOOLS"
		replace fipsco=31027 if leaname=="OSMOND PUBLIC SCHOOLS"
		replace fipsco=31153 if leaname=="SOUTH SARPY DIST 46"
		tab leaname if mi(fipsco) // Multiple counties
		
	keep if !mi(fipsco)
	bys leaid: gen count = _N
	assert count == 1 | mi(leaid)
	collapse (sum) tot*, by(fipsco)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	
	gen fips = 31
	keep fipsco pctblk pctwht tottch pctoth fips

	save "$data/ne_tch_demo.dta", replace	

********************************************************************************
* Wisconsin
********************************************************************************

	import excel using "$raw/teacher_demo/wisconsin/sedl09.xls", clear sheet("Staff Ethnicity Dist Licensed")
	renvars *, lower
	assert a[1]=="Dist Code" & b[1]=="District" & c[1]=="Position" & f[1] == "Black, not Hispanic" & l[1]=="White, not Hispanic" & o[1]=="Total"
	keep a b c f g l m o
	rename a distnum
	rename b leaname
	rename c pos
	rename f blf
	rename g blm
	rename l whf
	rename m whm
	rename o tottch
	tab pos
	keep if pos=="Teacher"
	destring *, replace
	egen totblk = rowtotal(blf blm)
	egen totwht = rowtotal(whf whm)
	gen tototh = tottch-totblk-totwht

	isid distnum 
	gen year = 2009
	tempfile y2009
	save `y2009'
	
	import excel using "$raw/teacher_demo/wisconsin/sedl10.xls", clear sheet("Staff Ethnicity Dist Licensed")
	renvars *, lower
	assert a[1]=="Dist Code" & b[1]=="District" & c[1]=="Position" & f[1] == "Black, not Hispanic" & l[1]=="White, not Hispanic" & o[1]=="Total"
	keep a b c f g l m o
	rename a distnum
	rename b leaname
	rename c pos
	rename f blf
	rename g blm
	rename l whf
	rename m whm
	rename o tottch
	tab pos
	keep if pos=="Teacher"
	destring *, replace
	egen totblk = rowtotal(blf blm)
	egen totwht = rowtotal(whf whm)
	gen tototh = tottch-totblk-totwht

	isid distnum 
	gen year = 2010
	tempfile y2010
	save `y2010'

	forvalues y=2011/2012 {
		local tempy = `y'-2000
		import excel using "$raw/teacher_demo/wisconsin/sedl`tempy'.xls", clear sheet("SEDL`tempy' Data")
		renvars *, lower
		assert b[4]=="Dist No" & c[4]=="District" & d[4]=="Position" & i[2] == "Black or African" & o[3]=="White (W)" & t[4]=="Total"
		keep b c d i j o p t
		rename b distnum
		rename c leaname
		rename d pos
		rename i blf
		rename j blm
		rename o whf
		rename p whm
		rename t tottch
		tab pos
		keep if pos=="Teacher"
		destring *, replace
		egen totblk = rowtotal(blf blm)
		egen totwht = rowtotal(whf whm)
		gen tototh = tottch-totblk-totwht

		isid distnum 
		gen year = `y'
		tempfile y`y'
		save `y`y''
		}

	forvalues y=2013/2016 {
		di "`y'"
		local tempy = `y'-2000
		import excel using "$raw/teacher_demo/wisconsin/sedl`tempy'.xlsx", clear sheet("SEDL`tempy' Data")
		renvars *, lower
		assert b[4]=="Dist Nbr" & c[4]=="District Name" & d[4]=="Position Name" & i[2] == "Black or African" & o[3]=="White (W)" & t[4]=="Total"
		keep b c d i j o p t
		rename b distnum
		rename c leaname
		rename d pos
		rename i blf
		rename j blm
		rename o whf
		rename p whm
		rename t tottch
		tab pos
		keep if pos=="Teacher"
		destring *, replace
		egen totblk = rowtotal(blf blm)
		egen totwht = rowtotal(whf whm)
		gen tototh = tottch-totblk-totwht
		
		drop if leaname=="Wis Dept of Public Instruction"

		isid distnum 
		gen year = `y'
		tempfile y`y'
		save `y`y''
		}

use `y2009', clear
forvalues y=2010/2016 {
	append using `y`y''
	}
	
	bys distnum: egen nvals = nvals(leaname)
	// looks all right for the most part
	isid distnum year
	
		bys distnum: egen max = max(year)
		tab max
		drop max
	
	gsort distnum -year
	collapse (sum) tot* (firstnm) leaname, by(distnum)
	replace leaname = upper(leaname)
	rename leaname tempname
	rename distnum stateid
	isid stateid
	
	preserve
		import excel using "$raw/teacher_demo/wisconsin/ccd_county_lea_xwalk_wi_w_state_id.xlsx", firstrow case(lower) clear
		replace state = upper(state)
		keep if state=="WISCONSIN"
		renvars countyname*, presub(countyname coname)
		keep leaname leaid coname???? fipsco???? stateid????
		reshape long coname fipsco stateid, i(leaname leaid) j(year)
		replace coname = upper(coname)
		duplicates drop
		destring fipsco, gen(temp) force
		tab fipsco if mi(temp)
		drop temp
		destring stateid, gen(temp) force
		tab stateid if mi(temp)
		drop temp
		destring fipsco stateid, force replace
		replace coname = "" if coname=="†"
		bys stateid: egen nvals = nvals(fipsco)
		egen tag = tag(leaid)
		tab nvals if tag //< 1%
		gsort leaid -year
		collapse (firstnm) fipsco, by(stateid)
		tempfile tempwi
		save `tempwi'
	restore
	merge 1:1 stateid using `tempwi', assert(2 3) keep(3) nogen
	assert !mi(fipsco)
		
	isid stateid fipsco
	collapse (sum) tot*, by(fipsco)
	gen pctblk = totblk/tottch
	gen pctwht = totwht/tottch
	gen pctoth = tototh/tottch
	
	gen fips = 55
	keep fipsco pctblk pctwht tottch pctoth fips
	save "$data/wi_tch_demo.dta", replace	

	
********************************************************************************
* Merge for synth data
********************************************************************************
	
	use "$data/ca_tch_demo.dta", clear
	foreach var in fl ny pa nj tn az mn ky dc ar al sc ct nc mi co ma ne wi {
		append using "$data/`var'_tch_demo.dta"
		}
	reshape long pct, i(fips fipsco tottch) j(race) string
	rename pct prop
	replace race = substr(race,1,2) if race!="oth"
	rename fipsco countyid
	saveold "$data/teacher_synth_county.dta", replace version(12)

log close
